We’re going to look at Instacart data.

# Load datasets
library(tidyverse)
## ── Attaching core tidyverse packages ────────────────────
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ─────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(p8105.datasets)
## 
## Attaching package: 'p8105.datasets'
## 
## The following object is masked _by_ '.GlobalEnv':
## 
##     instacart
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
data(instacart)
instacart = 
  instacart %>%
  janitor::clean_names() %>%
  filter(department %in% c("produce", "frozen", "dairy eggs", "pantry", "household")) %>%
  filter(order_hour_of_day >= 8 & order_hour_of_day <= 22) %>%
  select(department, reordered, order_hour_of_day, days_since_prior_order, order_dow, add_to_cart_order) 

scatterplot

to examine if there’s a relationship between the time since a customer’s last order and the likelihood of reordering.

instacart %>%
plot_ly(x = ~days_since_prior_order,
        y = ~reordered,
        type = 'scatter',
        mode = 'markers',
        marker = list(opacity = 0.6, size = 5)) %>%
  layout(title = "Scatter Plot of Days Since Prior Order vs Reordered",
         xaxis = list(title = "Days Since Prior Order"),
         yaxis = list(title = "Reordered (1 = Yes, 0 = No)"))

boxplot

To investigate how the add-to-cart order varies across different days of the week. Specifically, it can help answer questions like:

instacart %>%
plot_ly(x = ~order_dow,
        y = ~add_to_cart_order,
        color = ~factor(order_dow),
        type = 'box',
        colors = "viridis") %>%
  layout(title = "Distribution of Add-to-Cart Order by Day of the Week",
         xaxis = list(title = "Day of the Week"),
         yaxis = list(title = "Add to Cart Order"))

barplot

To investigate which departments tend to have shorter or longer gaps between orders. This could reveal if certain types of products are purchased more frequently.

barplot <- instacart %>%
  group_by(department) %>%
  summarize(avg_days_since_prior_order = mean(days_since_prior_order, na.rm = TRUE))


barplot %>%
  plot_ly(x = ~department,
        y = ~avg_days_since_prior_order,
        type = 'bar',
        color = ~department) %>%
  layout(title = "Average Days Since Prior Order by Department",
         xaxis = list(title = "Department"),
         yaxis = list(title = "Average Days Since Prior Order"))